/***************************************************************************
 *
 * Copyright (C) 2001 International Business Machines
 * All rights reserved.
 *
 * This file is part of the GPFS mmfslinux kernel module.
 *
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions 
 * are met:
 *
 *  1. Redistributions of source code must retain the above copyright notice, 
 *     this list of conditions and the following disclaimer. 
 *  2. Redistributions in binary form must reproduce the above copyright 
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution. 
 *  3. The name of the author may not be used to endorse or promote products 
 *     derived from this software without specific prior written
 *     permission. 
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *************************************************************************** */
/* $Id: tracedev.c,v 1.35.4.1 2002/05/21 21:44:58 dcraft Exp $
 *
 * $Log: tracedev.c,v $
 * Revision 1.35.4.1  2002/05/21 21:44:58  dcraft
 * Pull GPFS 1.2.1 up to kernel 2.4.18.
 * mmfsfuncs.Linux must be distributed with /usr/lpp/mmfs/src
 * on developerworks.
 *
 * Revision 1.35  2001/08/08 16:31:48  gjertsen
 * Need to include additional header file.
 *
 * Revision 1.34  2001/07/19 23:25:07  dcraft
 * Modified linux trace to allow non blocking trace record
 * writes (format is TRACE?N).  New gpfs swapd process created
 * which is responsible for reclaiming inodes (5 percent every
 * time it runs).  Marked all our inodes so that they would be
 * ignored by linux kswapd.  Added "unused" inode to inode
 * cache that could be used as a signal that linux kswapd is
 * running and kick off gpfs swapd.  Added means to ignore attempts
 * to kill mmfsd by kswapd if the system gets low on memory.
 * All done in an attempt to avoid kswapd premature wakeup on simple
 * locks and mutexes.
 *
 * Revision 1.33  2001/07/09 16:01:37  wyllie
 * Fix compiler warning
 *
 * Revision 1.32  2001/06/14 18:14:12  gjertsen
 * Initial changes for IA64 beta RH 7.1 with 2.4.3-3 kernel. Get GPFS_PRINTF
 * working again.
 *
 * Revision 1.31  2001/05/25 14:48:24  gjertsen
 * Minor fixes to get IA64 code to compile again.
 *
 * Revision 1.30  2001/04/20 19:09:17  gjertsen
 * Ensure mmfslinux modules specified for a particular configuration
 * will be loaded by insmod. This is done by overriding the build environment
 * settings and ensuring the first version entry in the modinfo section is correct.
 *
 * Revision 1.29  2001/04/12 21:22:01  dixonbp
 * Detect calls from interrupt level so we don't attempt to block.
 *
 * Revision 1.28  2001/03/13 23:31:11  eshel
 * check for bad address for string
 *
 * Revision 1.27  2001/01/27 20:37:51  dcraft
 * Move trace device header definition so it can be used by lcrash
 * Export header symbol.
 *
 * Revision 1.26  2000/12/18 13:53:21  gjertsen
 * More cleanup of comments/documentation.
 *
 * Revision 1.25  2000/12/15 13:56:52  gjertsen
 * Clean up documentation.
 *
 */

/**************************************************************************
 *
 * Loadable kernel module that implements the trace device. 
 *
 *
 *
 **************************************************************************/

#ifndef GPFS_PRINTF

#ifndef __KERNEL__
#  define __KERNEL__
#endif

#ifndef CUSTOM_LINUX_BUILD
/* Configure tracedev as SMP to ensure atomic operations work on either
   a UP or SMP platform for the standard build case */
#undef CONFIG_SMP
#undef CONFIG_UP
#define CONFIG_SMP 1
#endif

#include <Shark-gpl.h>

#include <linux/version.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/smp_lock.h>
#include <linux/vmalloc.h>

#include <asm/hardirq.h> /* in_interrupt */
#include <asm/uaccess.h> /* copy_to/from_user */

#if !defined(MODULE)
#define MOD_INC_USE_COUNT
#define MOD_DEC_USE_COUNT
#endif /* !MODULE */

#include <stdarg.h>
#include <Trace.h>
#include <lxtrace.h>

/* the daemon's task structure (for signal) */
static struct task_struct *taskP; 

/* The writeLock serializes trace writers as well as most other
 * access to the trace header element.  It should be avoided by the read
 * operation in order to allow the daemon to proceed to the data unimpeded. 
 */
#ifdef TRACE_SPIN
static spinlock_t writeLock;
#else
static struct semaphore writeLock;
#endif

/* The readLock serializes read acces to the trace records. 
 * Whenever both readLock and writeLock are required, writeLock is always
 * to be acquired first. 
 */
#ifdef TRACE_SPIN
static spinlock_t readLock;
#else
static struct semaphore readLock;  
#endif

/* Trace Header Element - THE anchor for the trace state */
static trcdev_header_t   lxthe;
static wait_queue_head_t daemonWaitQ;

/* Export pointers to internal data structures for debugging */
struct
{
  trcdev_header_t   *lxtheP;
  wait_queue_head_t *daemonWaitQP;
#ifdef TRACE_SPIN
  spinlock_t        *readLockP;
  spinlock_t        *writeLockP;
#else
  struct semaphore  *readLockP;
  struct semaphore  *writeLockP;
#endif
} TraceVarAddrs = { &lxthe, &daemonWaitQ, &readLock, &writeLock };

/* A trcdev_buffer is dirty if there is any data in it (nextP != beginP) AND the
 * dirtyP has not yet been advanced (by trc_read) past the data (to nextP) */
#define IS_DIRTY(b) ((b.nextP != b.beginP) && (b.nextP != b.dirtyP))

/* Lock operations.  These are used to synchronize the device operations.
 * In particular, they provide serialization to the Trace Header Element. 
 */
#ifdef TRACE_SPIN
#define LOCK_INIT(s)      s=SPIN_LOCK_UNLOCKED
#define ACQUIRE(s)        spin_lock(&s)
#define RELEASE(s)        spin_unlock(&s)
#define TRY_ACQUIRE_OK(s) (prim_spin_lock_nb(&s)==1) 
#else
#define LOCK_INIT(s)      sema_init(&s, 1)
#define ACQUIRE(s)        down(&s)
#define RELEASE(s)        up(&s)
#define TRY_ACQUIRE_OK(s) (down_trylock(&s)==0)
#endif

/* Updating state information requires the readLock in addition to 
 * writeLock.  The writeLock is widely held where the trace header is 
 * manipulated, but for the brief period of updating the state field, get the 
 * readLock as well. 
 */
#define TRC_STATE(S) { ACQUIRE(readLock); lxthe.state=S; RELEASE(readLock); }

/* Convert ascii representation of a hexadecimal to int */
int axtoi(char s[])
{
  int i, n;

  n = 0;
  for (i = 0; s[i] != '\0'; i++)
  {
    if (s[i] >= '0' && s[i] <= '9')
      n = 16 * n + (s[i] - '0');
    else if (s[i] >= 'A' && s[i] <= 'F')
      n = 16 * n + (s[i] - 'A' + 10);
    else if (s[i] >= 'a' && s[i] <= 'f')
      n = 16 * n + (s[i] - 'a' + 10);
    else
      break;
  }
  return n;
}

/* Verify that the specified hookword is one that is currently being traced. */
int isTraced(uint hw)
{
  int h;

  for (h = 0; h < lxthe.nHooks; h++)
   {
    /* The hookword is occupies the high-order three hex digits if the 
     * traceid. Zero matches all trace hooks 
     */
    if ((lxthe.hookP[h] == 0) || 
        (lxthe.hookP[h] == ((hw&0xFFF00000)>>20)))
      return 1;
  }
  return 0;
}

/* Verify access and copy to/from user-space operations. */
int copyin_check(const char *from, char *to, int size)
{
  int retval;
  if (access_ok(VERIFY_READ, from, size))
  {
    retval = copy_from_user(to, from, size);
    return 0;
  }
  return -EFAULT;
}
int copyout_check(const char *from, char *to, int size)
{
  int retval;
  if (access_ok(VERIFY_WRITE, to, size))
  {
    retval = copy_to_user(to, from, size);
    return 0;
  }
  return -EFAULT;
}


/* Construct the static trace header element ("the"). 
 * trc_open will allocate buffers and set the appropriate values. */
void trc_init()
{
  LOCK_INIT(writeLock);
  LOCK_INIT(readLock);

  ACQUIRE(writeLock);

  lxthe.major = 0; /* dynamic assignment (by register_chrdev in trc_register) */
  lxthe.minor = 0;
  lxthe.bufSize = 0;
  lxthe.nOpens = 0;
  lxthe.nWaits = 0;
  lxthe.nBuffers = 0;
  lxthe.devWaiting = 0;
  taskP = NULL;
  init_waitqueue_head(&daemonWaitQ);

  lxthe.writeBuf.beginP = NULL;
  lxthe.writeBuf.endP = NULL;
  lxthe.writeBuf.nextP = NULL;
  lxthe.writeBuf.dirtyP = NULL;

  ACQUIRE(readLock);
  lxthe.readBuf = lxthe.writeBuf;
  RELEASE(readLock);

  TRC_STATE(trc_initialized);

  RELEASE(writeLock);
}

/* Destroy the static trace header element (the) */
void trc_term()
{
  ACQUIRE(writeLock);
  ACQUIRE(readLock);

  /* The two buffers are allocated together.  Free them both here. */
  if (lxthe.writeBuf.beginP)
    vfree(MIN(lxthe.writeBuf.beginP, lxthe.readBuf.beginP));

  /* release the locks. The state is still trc_unregistred and trc_init
   * will obtain it again before finishing the cleanup). 
   */
  RELEASE(readLock);
  RELEASE(writeLock);

  /* (re)initialize all fields.  Rather than copy all the stuff that happens 
   * in trc_init, we can use it here to reset all the fields. */
  trc_init;
}

/* The device open operation.  The first open is initiated by the trace daemon, 
 * and comes after registration.  It results in the allocation of the trace
 * buffers, and identifying the trace daemon (so it can be signalled when
 * buffers are ready to be read).  */
int trc_open(struct inode *inodeP, struct file *fileP)
{
  int rc = 0;

  ACQUIRE(writeLock);

  /* Only the daemon opens the device O_RDWR, and only does so when turning 
   * trace on. 
   */
  if ((fileP->f_flags & O_ACCMODE) == O_RDWR)
  {
     if (lxthe.state != trc_registered)
     {
       rc = -EALREADY;
       goto exit;
     }

    if (lxthe.state == trc_registered)
    {
      /* The first open (lxtrace on) requires initialization of the header. */
      lxthe.minor = MINOR(inodeP->i_rdev) & 0xf;

      /* Only supporting one such device */
      if (lxthe.minor > 0)
      {
        rc = -ENODEV;
        goto exit;
      }

      /* If not configured otherwise, use the default buffer size. */
      if (lxthe.bufSize == 0)
        lxthe.bufSize = DEF_TRC_BUFSIZE;
	 
      /* Allocate dual trace buffers (new records go into the write buffer,
       * and the daemon reads (via trc_read) from the read buffer). */
      lxthe.writeBuf.beginP = vmalloc(2*lxthe.bufSize);
      if (!lxthe.writeBuf.beginP)
      {
        rc = -ENOMEM;
        goto exit;
      }
      lxthe.writeBuf.endP = lxthe.writeBuf.beginP + lxthe.bufSize - 1;
      lxthe.writeBuf.nextP = lxthe.writeBuf.beginP;
      lxthe.writeBuf.dirtyP = lxthe.writeBuf.beginP;

      ACQUIRE(readLock);
      lxthe.readBuf.beginP = lxthe.writeBuf.beginP + lxthe.bufSize; 
      lxthe.readBuf.endP = lxthe.readBuf.beginP + lxthe.bufSize - 1;
      lxthe.readBuf.nextP = lxthe.readBuf.beginP;
      lxthe.readBuf.dirtyP = lxthe.readBuf.beginP;
      RELEASE(readLock);

      /* save pointer to the daemon task information, and mark the 
       * device active 
       */
      taskP = current;
      TRC_STATE(trc_opened);
    }
  }

  /* Applications must open the trace device O_WRONLY.  These opens do not 
   * require any processing.  If the daemon has turned tracing on, the open 
   * is allowed and subsequent write() calls will be handled.  If the daemon 
   * has NOT turned tracing on, the application open will be granted, but 
   * subsequent write() calls will NOOP
   * until the daemon turns trace on (state == trac_active). */

  else if ((fileP->f_flags & O_ACCMODE) != O_WRONLY) 
  {
    /* After "trace on", subsequent trace control commands open O_RDONLY.  */
    if (lxthe.state != trc_active)
    {
      rc = -EALREADY;
      goto exit;
    }
  }

  lxthe.nOpens += 1;
  MOD_INC_USE_COUNT;

exit:
  RELEASE(writeLock);
  return rc;
}

/* The device read operation.  This is to be used only by the trace daemon to
 * retrieve trace buffers for the purposes of writing to the output file. */
ssize_t trc_read(struct file *fileP, char *bufP, size_t nBytes, loff_t *ppos)
{
  ssize_t nDone = 0;
  ssize_t nReady = 0;

  /* All access to lxthe.readBuf is protected via the readLock.  */
  ACQUIRE(readLock);

  /* Only the trace daemon should is allowed to read. */
  if (taskP && taskP->pid != current->pid)
  {
    nDone = -EPERM;
    goto exit;
  }

  /* The readLock (as well as writeLock) is required to change state information
   * (see the TRC_STATE macro).  This is because trc_read depends on what it 
   * sees there.  Mostly, this is overhead in the open/close path and does not
   * affect * the hot trc_write path. */

  /* There is data waiting to be processed by the daemon.  Read is allowed 
   * here during normal operation (trc_active) and as trace is terminating 
   * (this to get the last group of buffered records) */
  if (((lxthe.state == trc_active) || (lxthe.state == trc_closed)) && 
       (IS_DIRTY(lxthe.readBuf)))
  {
    /* Make sure that the caller's buffer is large enough to hold 
     * what we have. */
    nReady = lxthe.readBuf.nextP - lxthe.readBuf.beginP;
    if (nReady > nBytes)
    {
      nDone = -EFBIG;
    }
    else if (0 == copyout_check(lxthe.readBuf.dirtyP, bufP, nReady))
    {
      nDone = nReady;
      lxthe.readBuf.dirtyP += nDone;

      if (lxthe.devWaiting)
        wake_up(&daemonWaitQ);
    }
  }

exit:
  RELEASE(readLock);
  return nDone;
}

/* Internal routine to signal the trace daemon to shutdown. 
   NOTE that this routine is called while holding the writeLock. */
void trc_signal_term()
{
  struct siginfo sigData;

  sigData.si_signo = SIGTERM;
  sigData.si_errno = 0;
  sigData.si_code  = SI_KERNEL;
  send_sig_info(SIGTERM, &sigData, taskP);
}

/* Internal routine to schedule i/o of the trace buffer.
   NOTE that this routine is called while holding the writeLock. */
int trc_signal_io(int cantBlock)
{
  trcdev_buffer_t tBufP;
  struct siginfo sigData;
  int rc;

  /* Switch buffers and flush the full one. */

  /* If the previous read buffer hasn't been processed by the daemon yet,
   * we're going to lose the records it contains.  Keep track of these losses
   * which can be queried by ioctl(trc_dump).  */

  if (cantBlock)
  {
    if (!TRY_ACQUIRE_OK(readLock))
      return ENOSPC;
      
    if (IS_DIRTY(lxthe.readBuf))
    {
      RELEASE(readLock);
      return ENOSPC;
    }
  }
  else
  {
    ACQUIRE(readLock);

    while (IS_DIRTY(lxthe.readBuf))
    {
      lxthe.nWaits += 1;
      lxthe.devWaiting = 1;
      RELEASE(readLock);
      sleep_on(&daemonWaitQ);
      ACQUIRE(readLock);
      lxthe.devWaiting = 0;
    }
  }

  lxthe.nBuffers += 1; /* Number of buffers filled */

  /* Switch the buffers */
  tBufP = lxthe.readBuf;
  lxthe.readBuf = lxthe.writeBuf;
  lxthe.writeBuf= tBufP;

  RELEASE(readLock);

  if (lxthe.state == trc_active)
  {
    /* signal the daemon that there is a trace buffer to be read and processed.
       If not active, this is an fsync after ioctl(trc_end) in which case the
       daemon already knows its coming. */
    sigData.si_signo = SIGIO;
    sigData.si_errno = 0;
    sigData.si_code  = SI_KERNEL;
    send_sig_info(SIGIO, &sigData, taskP);
  }

  /* Reset the (new) writeBuf to a clean state */
  lxthe.writeBuf.dirtyP = lxthe.writeBuf.nextP = lxthe.writeBuf.beginP;

  return 0;
}

/* Write to the trace device (i.e., write a trace record). */
ssize_t 
trc_write_internal(struct file *fileP, const char *bufP,
                   size_t nBytes, loff_t *posP, int cantBlock)
{
  int nDone = 0;
  trc_header_t hdr;
  int rc;

  /* Trace calls from interrupt level are not supported. */
  if (in_interrupt())
    return nDone;

  /* Too much trace data appears to have been passed in */
  if (nBytes > LXTRACE_MAX_DATA)
    return nDone;

  /* Construct the trace record header */
  hdr.trMagic = LXTRACE_MAGIC;
  do_gettimeofday(&hdr.trTime);
  hdr.trProcess = current->pid;
#if LINUX_KERNEL_VERSION >= 2041800
  hdr.trCPU = smp_processor_id();
#else 
  hdr.trCPU = current->processor;
#endif
  hdr.trLength = nBytes;

  if (cantBlock)
  {
    if (!TRY_ACQUIRE_OK(writeLock))
      return nDone;
  }
  else
    ACQUIRE(writeLock);

  if (lxthe.state == trc_active)
  {
    /* If there is not enough room in the trace buffer for this record, schedule
     * it for io and (after the buffers have been swapped) append the record. 
     */
    if ((lxthe.writeBuf.nextP + nBytes + sizeof(trc_header_t)) > 
        lxthe.writeBuf.endP)
    {
      if (trc_signal_io(cantBlock))
        goto xerror;
    }

    /* Insert the header stamp into the buffer ahead of the 
     * application record. 
     */
    memcpy(lxthe.writeBuf.nextP, &hdr , sizeof(trc_header_t));
    lxthe.writeBuf.nextP += sizeof(trc_header_t); 

    /* Move the application trace record directly into the trace buffer */

    /* The beginning of the trace record MUST be a hookword number. 
     * Verify the hookword specified is being traced.  If it is, advance
     * the nextP pointer in the write buffer. 
     */
    if (!fileP) 
    { /* Kernel call */
      memcpy(lxthe.writeBuf.nextP, bufP, nBytes);
      if (isTraced(*((uint *)lxthe.writeBuf.nextP)))
      {
        nDone = nBytes;
        lxthe.writeBuf.nextP += nDone;
      }
      else
      {
        lxthe.writeBuf.nextP -= sizeof(trc_header_t);
      }
    }
    else 
    { /* User call */
      if ((0 == copyin_check(bufP, lxthe.writeBuf.nextP, nBytes)) &&
          (isTraced(*((uint *)lxthe.writeBuf.nextP))))
      {
        nDone = nBytes;
        lxthe.writeBuf.nextP += nDone;
      }
      else
      {
        /* Move failed, or hw wasn't traced, remove the header */
        lxthe.writeBuf.nextP -= sizeof(trc_header_t);
      }
    }
  }

xerror:
  RELEASE(writeLock);
  return nDone;
}

ssize_t 
trc_write(struct file *fileP, const char *bufP,
          size_t nBytes, loff_t *posP)
{
  return trc_write_internal(fileP, bufP, nBytes, posP, false);
}

/* Before close, a sync of the trace device will flush the records
 * still in the read buffer (even though it might not be full).  A
 * close without this call could result in the loss of these records. */
int trc_fsync_internal(struct file* fileP, struct dentry* dP, int datasync)
{
  int rc = 0;

  ACQUIRE(writeLock);

  /* Allow fsync during normal operation OR after ioctl(trc_end) has
     disabled further trace writing (allows an fsync before close to
     flush the buffered records). */
  if ((lxthe.state == trc_active) || (lxthe.state == trc_closed))
  {
    /* We could check IS_DIRTY(lxthe.readBuf) and reject the fsync here.
     * The reason would be that such an operation would lose the data
     * that is already in the read buffer.  If we do this, be sure to
     * acquire the readLock around the check.  */
 
    /* If there is data in the current write buffer, switch buffers and
     * signal the daemon to process it. */
    if (lxthe.writeBuf.nextP != lxthe.writeBuf.beginP)
      trc_signal_io(false);
  }

  RELEASE(writeLock);
  return rc;
}


/* The externally visible version of trc_fsync_internal */
int trc_fsync()
{
  return trc_fsync_internal(NULL, NULL, 0);
}


/* The device close operation. */
int trc_close(struct inode *inodeP, struct file *fileP)
{
  ACQUIRE(writeLock);

  /* The "trace off" command, having opened the device O_RDONLY, stopped
   * tracing via ioctl(trc_end) and is now releasing its descriptor. */
  if ((fileP->f_flags & O_ACCMODE) == O_RDONLY && lxthe.state == trc_stopped)
  {
    /* We will now signal the daemon to terminate. */
    /* btw, nOpens should be >= 2 */

    TRC_STATE(trc_closed);
    trc_signal_term();
  }

  /* The trace daemon only closes the device upon termination. */
  else if (taskP && taskP->pid == current->pid)
  {
    /* The final trace daemon close.  Reset for subsequent use. */
    lxthe.nWaits = 0;
    lxthe.nBuffers = 0;
    lxthe.devWaiting = 0;
    taskP = NULL;
    TRC_STATE(trc_registered);
  }

  /* Either the application or the daemon closed (giving up the descriptor, but
   * not turning trace off. */
  else //if (lxthe.state == trc_active), and all others...
  {
    /* close for secondary open (no action required) */
    (void)0;
  }

  lxthe.nOpens -= 1;
  MOD_DEC_USE_COUNT;

  RELEASE(writeLock);
  return 0;
}


/* ioctl op used to for low-level access to trace operation. */
int trc_ioctl(struct inode *inodeP, struct file *fileP,
                  unsigned int op, unsigned long kx_args)
{
  int h, rc = -EFAULT; /* access_ok failure */
  struct kArgs args_cp;
  struct kArgs *args = (struct kArgs *)kx_args;
  char hwsP[LXTRACE_HW_STRING_LEN];
  char * newBufP = NULL;
  char * trc_dumpP = NULL;
  char * trc_nextP = NULL;

  char *hwP;

  ACQUIRE(writeLock);

  switch (op) {

    case trc_begin:
      if (lxthe.state == trc_active)
        rc = -EALREADY; 
      else if (lxthe.state != trc_opened)
        rc = -EBADF;
      else
      {
        /* get the argument array */
        if (0 == copyin_check((char*)args, (char*)&args_cp, sizeof(args_cp)))
        {
          if (0==copyin_check((char*)args_cp.arg2, hwsP,
                              MIN(strlen((char *)args_cp.arg2),
                                  LXTRACE_HW_STRING_LEN)))
          {
              lxthe.nHooks = 0;
              hwP = strtok(hwsP, ",");
              while (hwP)
              {
                lxthe.hookP[lxthe.nHooks] = axtoi(hwP);
                lxthe.nHooks += 1;
                hwP = strtok(NULL, ",");

                /* Stop if there are no more tokens (strtok returned null 
                 * above), or if we've reached the maximum hookwords we 
                 * support, or if we've reached the number of hooks the 
                 * caller claimed to be passing. 
                 */
                if ((lxthe.nHooks >= LXTRACE_MAX_HW) || 
                    (lxthe.nHooks >= args_cp.arg1))
                  hwP = NULL;
              }
          }

          TRC_STATE(trc_active);
          rc = 0;
        }
      }
      break;

    case trc_end:
      if (lxthe.state != trc_active)
        rc = -EBADF;
      else
      {
        lxthe.nHooks = 0;
        TRC_STATE(trc_stopped);
        rc = 0;
      }
      break;

    case trc_bufSize:

      /* The daemon may call ioctl to change the desired buffer size.
         On open, buffers of the default size are allocated.  This call
         frees the current buffers (replacing them with new ones).  Any
         trace records currently in the buffers will be lost. */

      if (lxthe.state == trc_opened)
      {
        /* get the argument array */
        if (0 == copyin_check((char*)args, (char*)&args_cp, sizeof(args_cp)))
        {
          /* Allocate the new (dual) trace buffers. */
          /* arg1 is the requested buffer size */

          newBufP = vmalloc(2*args_cp.arg1);
          if (!newBufP)
          {
            rc = -ENOMEM;
          }
          else
          {
            /* NOTE that the lock makes sure the daemon is not reading the 
             * buffer to be freed and any new read that finds an empty read 
             * buffer is handled in trc_read 
             */
            ACQUIRE(readLock);

            /* Since the state is no yet "active", there shouldn't be any dirty
             * buffers so far. 
             */
            if ((!IS_DIRTY(lxthe.readBuf)) && (!IS_DIRTY(lxthe.writeBuf)))
            {
              /* free the previous buffers */
              if (lxthe.writeBuf.beginP)
                vfree(MIN(lxthe.writeBuf.beginP, lxthe.readBuf.beginP));

              lxthe.bufSize = args_cp.arg1;
              lxthe.writeBuf.beginP = newBufP;

              lxthe.writeBuf.endP = lxthe.writeBuf.beginP + lxthe.bufSize - 1;
              lxthe.writeBuf.nextP = lxthe.writeBuf.beginP;
              lxthe.writeBuf.dirtyP = lxthe.writeBuf.beginP;
       
              lxthe.readBuf.beginP = lxthe.writeBuf.beginP + lxthe.bufSize;
              lxthe.readBuf.endP = lxthe.readBuf.beginP + lxthe.bufSize - 1;
              lxthe.readBuf.nextP = lxthe.readBuf.beginP;
              lxthe.readBuf.dirtyP = lxthe.readBuf.beginP;
            }
            RELEASE(readLock);

            rc = 0; /* successful completion */
          }
        }
      }
      break;

    case trc_dump:

      /* format trace header information and return to daemon */
      trc_nextP = trc_dumpP = vmalloc(LXTRACE_DUMP_SIZE);
      if (!trc_nextP)
      {
        rc = -ENOMEM;
      }
      else
      {
        /* Already have writeLock.  Need read as well. */
        ACQUIRE(readLock);
        /* Format the state information suitable for displaying by 
         * the daemon. 
         */
        sprintf(trc_nextP, "Trace Header Element: 0x%08X\n", &lxthe);
        trc_nextP += strlen(trc_nextP);

        /* Global information on device number, buffer sizes, 
         * and lost records. 
         */
        sprintf(trc_nextP, "  Major %d Minor %d bufSize 0x%X nOpens %d "
                           "nBuffers %d nWaits %d Daemon %d\n",
                           lxthe.major, lxthe.minor, lxthe.bufSize, lxthe.nOpens, 
                           lxthe.nBuffers, 
                           lxthe.nWaits, taskP ? taskP->pid: 0);
        trc_nextP += strlen(trc_nextP);

        /* Append the list of hookwords being traced */
        sprintf(trc_nextP, "  Hooks(%d): ", lxthe.nHooks);
        trc_nextP += strlen(trc_nextP);
        for (h = 0; h < lxthe.nHooks; h++)
        {
          sprintf(trc_nextP, "%X ", lxthe.hookP[h]);
          trc_nextP += strlen(trc_nextP);
        }
        sprintf(trc_nextP, "\n");
        trc_nextP += strlen(trc_nextP);

        /* Append buffer information */
        sprintf(trc_nextP, "  writeBuf: beginP 0x%X endP 0x%X nextP 0x%X "
                           "dirtyP 0x%X isDirty %d\n",
                           lxthe.writeBuf.beginP, lxthe.writeBuf.endP,
                           lxthe.writeBuf.nextP, lxthe.writeBuf.dirtyP, 
                           IS_DIRTY(lxthe.writeBuf));
        trc_nextP += strlen(trc_nextP);

        sprintf(trc_nextP, "  readBuf : beginP 0x%X endP 0x%X nextP 0x%X "
                           "dirtyP 0x%X isDirty %d\n",
                           lxthe.readBuf.beginP, lxthe.readBuf.endP,
                           lxthe.readBuf.nextP, lxthe.readBuf.dirtyP, 
                           IS_DIRTY(lxthe.readBuf));
        trc_nextP += strlen(trc_nextP);

#if 0
        /* verify dumpBuf size */
        sprintf(trc_nextP, "  dumpBuf size %d (used %d)\n",
                LXTRACE_DUMP_SIZE, (trc_nextP-trc_dumpP));
        trc_nextP += strlen(trc_nextP);
#endif 
        RELEASE(readLock);

        /* get the argument array */
        if (0 == copyin_check((char*)args, (char*)&args_cp, sizeof(args_cp)))
        {
          /* arg1 is the user buffer size, arg2 is the address of the bufer */
          rc = copyout_check (trc_dumpP , (char*)args_cp.arg2, 
                              MIN(strlen(trc_dumpP)+1, args_cp.arg1));
        }
      }

      break;

    default:
      rc = -1;
      break;
  }

exit:
  if (trc_dumpP)
    vfree(trc_dumpP);

  RELEASE(writeLock);

  return rc;
}

static struct file_operations trc_ops =
{
  llseek:     NULL,
  read:       trc_read,  /* read op allows the daemon to retrieve records */
  write:      trc_write, /* Trace points write to the device */
  readdir:    NULL,
  poll:       NULL,
  ioctl:      trc_ioctl, /* control op to change buffering or dump state */
  mmap:       NULL,
  open:       trc_open,  /* Prepare the device for tracing */
  flush:      NULL,
  release:    trc_close, /* Terminate tracing and close the device */
  fsync:      trc_fsync_internal, /* Sync all buffered data to the daemon */
  fasync:     NULL,
  lock:       NULL,
  readv:      NULL,
  writev:     NULL,
};

/* Register the trace device "/dev/trace" and save the major number in 
 * the header 
 */
int trc_register()
{
  int rc;
  int major;

  ACQUIRE(writeLock);

  major = register_chrdev(lxthe.major, "trace", &trc_ops);
  if (major < 0)
  {
    rc = -1;
    goto exit;
  }

  if (lxthe.major == 0)
    lxthe.major = major;

  TRC_STATE(trc_registered);

exit:
  RELEASE(writeLock);
  return 0;
}

/* Unregister the trace device */
void trc_unregister()
{
  ACQUIRE(writeLock);
  unregister_chrdev(lxthe.major, "trace");
  TRC_STATE(trc_unregistered);
  RELEASE(writeLock);
}


struct trcRec
{
  trc_datahdr_t hdr;
  char data[LXTRACE_MAX_DATA-sizeof(trc_datahdr_t)];
};

int
STraceFormat(int *trRecLenP, struct trcRec *trP, int hookword, 
             int nArgs, int pos, va_list listP)
{
#ifdef __64BIT__
# define Int64 long long
# define ARGLEN 8
  Int64 tmpint;
#else
# define ARGLEN 4
# define Int32 int
  Int32 tmpint;
#endif /* __64BIT__ */

  int i, len;
  char *p;
  char *s;
  int stringLen;

  if (lxthe.state != trc_active)
    return -1;

  /* Initialize trace header */
  trP->hdr.trHook = hookword;
  trP->hdr.trNArgs = 0;
  trP->hdr.trSPos = pos;
  trP->hdr.trSLen = 0;

  p = trP->data;
  len = 0;

  /* Test for trace formats that aren't supported yet */
  if ((pos == _TR_FORMAT_I) && (nArgs > LXTRACE_MAX_FORMAT_SUBS))
  {
#ifdef DBGASSERTS
    printk("_STrace: too many arguments (hook %X)\n", trP->hdr.trHook);
#endif /* DBGASSERTS */
    return -1;
  }

  /* Append the string argument */
  if (pos >= 0  &&  pos < LXTRACE_MAX_FORMAT_SUBS)
  {
    /* Items (if any) preceeding the string argument */
    for (i = 0; i < pos; i++)
    {
#ifdef __64BIT__
      tmpint = va_arg(listP, Int64);
#else
      tmpint = va_arg(listP, Int32);
#endif // __64BIT__

      memcpy(p, &tmpint, ARGLEN);
      p += ARGLEN;
      len += ARGLEN;
      trP->hdr.trNArgs += 1;
    }

    /* Copy the string, making sure it does not overflow the buffer */
    s = va_arg(listP, char*);
    if (s < (char*)4096) /* bad address */
    {
      printk("_STrace: bad address 0x%X  hook 0x%X\n", s, hookword);
      stringLen = strlen("<bad address>");
      stringLen = MIN(stringLen, 
                      sizeof(trP->data) - (nArgs*ARGLEN) - 1 - (ARGLEN-1));
      memcpy(p, "<bad address>", stringLen);
    }
    else
    {
      stringLen = strlen(s);
      stringLen = MIN(stringLen, 
                      sizeof(trP->data) - (nArgs*ARGLEN) - 1 - (ARGLEN-1));
      memcpy(p, s, stringLen);
    }
    p[stringLen] = '\0';
    stringLen += 1;
    trP->hdr.trSLen = ((stringLen+ARGLEN-1)/ARGLEN)*ARGLEN;
    p += trP->hdr.trSLen;
    len += trP->hdr.trSLen;

    /* Append items following string argument */
    for (i = pos; i < nArgs; i++)
    {
#ifdef __64BIT__
      tmpint = va_arg(listP, Int64);
#else
      tmpint = va_arg(listP, Int32);
#endif // __64BIT__

      memcpy(p, &tmpint, ARGLEN);
      p += ARGLEN;
      len += ARGLEN;
      trP->hdr.trNArgs += 1;
    }
  }
  else /* !IS_SFORMAT */
  {
    /* Place the fixed parameters in the temporary trace buffer */
    for (i = 0; i < nArgs; i++)
    {
#ifdef __64BIT__
      tmpint = va_arg(listP, Int64);
#else
      tmpint = va_arg(listP, Int32);
#endif /* __64BIT__ */

      memcpy(p, &tmpint, ARGLEN);
      p += ARGLEN;
      len += ARGLEN;
      trP->hdr.trNArgs += 1;
    }
  }

  /* Append the float argument */
  if (pos == _TR_FORMAT_F)
  {
    double tmpdbl = va_arg(listP, double);

    memcpy(p, &tmpdbl, sizeof(tmpdbl));
    p += sizeof(tmpdbl);
    len += sizeof(tmpdbl);
  }

  *trRecLenP = sizeof(trc_datahdr_t) + len;
  /* DBGASSERT(*trRecLenP <= LXTRACE_MAX_DATA); */

  return 0;
}


extern void 
_STraceNB(int hookword, int nArgs, int pos, ...)
{
  int trRecLen;
  struct trcRec tr;
  int rc;
  va_list listP;
  
  va_start(listP, pos);
  rc = STraceFormat(&trRecLen, &tr, hookword, nArgs, pos, listP);
  va_end(listP);

  if (rc != 0)
    return;

  trc_write_internal(NULL, (const char *)&tr, trRecLen, NULL, true);
}

extern void 
_STrace(int hookword, int nArgs, int pos, ...)
{
  int trRecLen;
  struct trcRec tr;
  int rc;
  va_list listP;
  
  va_start(listP, pos);
  rc = STraceFormat(&trRecLen, &tr, hookword, nArgs, pos, listP);
  va_end(listP);

  if (rc != 0)
    return;

  trc_write_internal(NULL, (const char *)&tr, trRecLen, NULL, false);
}

int
XTraceFormat(int *trRecLenP, struct trcRec *trP, int hookword, char *fmt, 
             va_list vargs)
{
  int stringLen;

  if (lxthe.state != trc_active)
    return -1;

  /* Initialize trace header and format data into buffer, being careful not
     to run off the end of the buffer */
  trP->hdr.trHook = hookword;
  trP->hdr.trNArgs = 0;
  trP->hdr.trSPos = _TR_FORMAT_X;

  /* stringLen = vsnprintf(tr.data, sizeof(tr.data), fmt, vargs);
   * Unfortunately, vsnprintf does not exist in the kernel.  Use vsprintf and
   * test for overflow after the fact. 
   */
  stringLen = vsprintf(trP->data, fmt, vargs) + 1;
  if (stringLen <= 0  ||  stringLen > sizeof(trP->data))
  {
    printk("_XTrace: argument too long.  len=%d max=%d hook=0x%X\n",
           stringLen, sizeof(trP->data)-1, trP->hdr.trHook);
    trP->data[sizeof(trP->data)-1] = '\0';
    stringLen = strlen(trP->data) + 1;
  }
  trP->hdr.trSLen = ((stringLen+ARGLEN-1)/ARGLEN)*ARGLEN;
  *trRecLenP = sizeof(trc_datahdr_t) + trP->hdr.trSLen;
  /*  DBGASSERT(trRecLen <= LXTRACE_MAX_DATA); */

  return 0;
}

extern void 
_XTraceNB(int hookword, char *fmt, ...)
{
  struct trcRec tr;
  int trRecLen;
  int rc;
  va_list vargs;
  
  va_start(vargs, fmt);
  rc = XTraceFormat(&trRecLen, &tr, hookword, fmt, vargs);
  va_end(vargs);

  if (rc != 0) 
    return;

  trc_write_internal(NULL, (const char *) &tr, trRecLen, NULL, true);
}

extern void 
_XTrace(int hookword, char *fmt, ...)
{
  struct trcRec tr;
  int trRecLen;
  int rc;
  va_list vargs;
  
  va_start(vargs, fmt);
  rc = XTraceFormat(&trRecLen, &tr, hookword, fmt, vargs);
  va_end(vargs);

  if (rc != 0)
    return;

  trc_write_internal(NULL, (const char *) &tr, trRecLen, NULL, false);
}

/* Module initialization */
#ifdef MODULE
int init_module(void)
#else
int tracedev_int(void)
#endif /* MODULE */
{
  trc_init();
  return trc_register();
}

/* Module unload */
#ifdef MODULE
void cleanup_module(void)
#else
void tracedev_cleanup(void)
#endif /* MODULE */
{
  trc_unregister();
  trc_term();
}

#else

/* stub tracedev module for GPFS_PRINTF so that we can use mmfsenv */
#include <linux/version.h>
#include <linux/kernel.h>
#include <linux/module.h>

int init_module(void)
{
  printk("tracedev: init_module\n");
  return 0;
}

void cleanup_module(void)
{
  printk("tracedev: cleanup_module\n");
  return;
};

#endif /* !GPFS_PRINTF */
